library(sf)
library(leaflet)
library(terra)
library(sp)
library(rnaturalearth)
library(dplyr)
library(leaflegend)
library(htmltools)
library(ggplot2)
library(plotly)
The database used in this study contains 3468 observations of the 11 variables briefly described in Table 1.
dados = read.csv2("C:/Users/vieir/OneDrive/Ambiente de Trabalho/UMinho/Mestrado - Estatística para Ciência de Dados/2º Ano/Projeto em Estatística/Dados/dados_completos.csv",
header = T,sep=";")
Table 1: Presentation of the variables in the Pelago database
| Variable | Description | Type |
|---|---|---|
| RAD | Radials - acoustic survey areas (from 1 to 22) | Categorical |
| cruzcod | Anonymous vessel identification | Categorical |
| Year | Year of the campaign (2015 to 2022) | Categorical |
| longdec | Longitude coordinate (in °) | Quantitative |
| latdec | Latitude coordinate (in °) | Quantitative |
| depth | Depth, estimated, of abundance (in m) | Quantitative |
| ANE.EA | Biomass index1 of anchovy (in m²/nmi²) | Quantitative |
| Day | Day of observation (from 1 to 30) | Date |
| Month | Month of observation (3 to 5) | Date |
| SST | Sea surface temperature (in °C) | Quantitative |
| CHL | Chlorophyll concentration in seawater (in mg/m³) | Quantitative |
| Presence | 0: Absence of anchovy 1: Presence of anchovy2 |
Categorical |
1 This index represents the Nautical Area-Scattering Coefficients (NASC) where nmi represents nautical miles.
2 The presence of anchovy is considered if the abundance value is strictly positive.
We have treated the data by assuming that the observations either have a strictly positive biomass index, indicating the presence of anchovy at that point, or do not have one, indicating its absence. It is important to note that not all variables are of interest in an exploratory analysis. In this case, the variables cruzcod, longdec, latdec, and Day are not of interest.
# Retirar Variáveis com pouco interesse
dados = dados[,-c(3,4,6,12)]
# Subset - apenas acima dos 39'
dados.sub <- dados[dados$latdec > 39,]
rownames(dados.sub) <- 1:nrow(dados.sub)
# Transformar as variaveis
dados.sub$long.utm = as.numeric(dados.sub$long.utm)
dados.sub$lat.utm = as.numeric(dados.sub$lat.utm)
dados.sub$year = as.factor(dados.sub$year)
dados.sub$RAD = as.factor(dados.sub$RAD)
dados.sub$Day = as.numeric(dados.sub$Day)
dados.sub$Month = as.factor(dados.sub$Month)
dados.sub$longdec = as.numeric(dados.sub$longdec)
dados.sub$latdec = as.numeric(dados.sub$latdec)
dados.sub$depth = as.numeric(dados.sub$depth)
dados.sub$ANE.EA = as.numeric(dados.sub$ANE.EA)
dados.sub$SST = as.numeric(dados.sub$SST)
dados.sub$CHL = as.numeric(dados.sub$CHL)
dados.sub$log_CHL = log(dados.sub$CHL)
# Retirar os níveis que não são usados
dados.sub$Month <- droplevels(dados.sub$Month)
dados.sub$RAD = droplevels(dados.sub$RAD)
# Ver presenças e ausências
dados.sub$presenca = ifelse(dados.sub$ANE.EA == 0, 0, 1)
# Tranformar em variavel categorica
dados.sub$presenca = as.factor(dados.sub$presenca)
Table 2: Anchovy presence and absence over different study years.
| Year | Presences | Absences | Observations | Presence Proportion (%) | Absence Proportion (%) | Mean Abundance (m²/nmi²) |
|---|---|---|---|---|---|---|
| 2015 | 12 | 378 | 390 | 3.08% | 96.9% | 702 |
| 2016 | 68 | 404 | 472 | 14.4% | 85.6% | 667 |
| 2017 | 23 | 442 | 465 | 4.95% | 95.1% | 597 |
| 2018 | 22 | 418 | 440 | 5% | 95% | 2482 |
| 2019 | 11 | 446 | 457 | 2.41% | 97.6% | 290 |
| 2020 | 55 | 345 | 400 | 13.8% | 86.2% | 877 |
| 2021 | 94 | 378 | 472 | 19.9% | 80.1% | 428 |
| 2022 | 280 | 92 | 372 | 75.3% | 24.7% | 317 |
| Total | 565 | 2903 | 3468 | 16.3% | 83.7% | 795 |
Table 2 presents the number of observations with the presence or absence of anchovy and the respective mean abundance (considering abundance as the value of the biomass index estimated by NASC, as mentioned in Table 3.1) in locations where anchovy were present.
Overall, 16.3% of the observations indicate the presence of anchovy. However, the proportion of presences has been above this value since 2020, with the proportion of presences being close to 75% in 2022.
Although the proportion of presences has increased post-2020, the mean abundance has been decreasing. This may indicate that the anchovy were found in smaller schools in 2021 or 2022, in contrast to 2018, where, although less distributed along the Portuguese coast, the anchovy were found in larger schools.
# Criar o gráfico para Temperatura (SST)
p1 <- ggplot(dados.sub, aes(x = presenca, y = SST, fill = presenca)) +
geom_boxplot() +
scale_fill_manual(values = c("red2", "green4"), labels = c("Absence", "Presence")) +
labs(x = "Presence", y = "Sea Surface Temperature (°C)", fill = "Presence") +
theme_minimal()
p1_interactive <- ggplotly(p1)
# Criar o gráfico para Clorofila (log(CHL))
p2 <- ggplot(dados.sub, aes(x = presenca, y = log(CHL), fill = presenca)) +
geom_boxplot() +
scale_fill_manual(values = c("red2", "green4"), labels = c("Absence", "Presence")) +
labs(x = "Presence", y = "Chlorophyll Concentration (log)", fill = "Presence") +
theme_minimal()
p2_interactive <- ggplotly(p2)
p1_interactive
p2_interactive
# Carregue os dados de fronteira de Portugal
portugal <- ne_countries(country = "Portugal", scale = "large", returnclass = "sf")
espanha <- ne_countries(country = "Spain", scale = "large", returnclass = "sf")
# Assuma que 'dados' é seu data.frame original com as colunas 'long.utm' e 'lat.utm'
d <- data.frame(lon = dados.sub[,5], lat = dados.sub[,6])
coordinates(d) <- ~lon + lat
proj4string(d) <- CRS("+proj=utm +zone=29 +ellps=WGS84 +datum=WGS84 +units=m +no_defs +towgs84=0,0,0")
CRS.new <- CRS("+proj=longlat +datum=WGS84 +no_defs")
d.longlat <- spTransform(d, CRS.new)
# Combine as coordenadas transformadas com os dados originais
dados.longlat <- cbind(dados.sub, data.frame(coordinates(d.longlat)))
names(dados.longlat)[ncol(dados.longlat)-1] <- "longdec"
names(dados.longlat)[ncol(dados.longlat)] <- "latdec"
# Crie um objeto sf a partir do data.frame transformado
dados.sf <- st_as_sf(dados.longlat, coords = c("longdec", "latdec"), crs = 4326)
# Definir cores para presença e ausência
pal <- colorFactor(palette = c("red", "green"), domain = dados.sf$presenca)
# Define a função para escalar os tamanhos dos pontos
scale_size <- function(x, min_size = 3, max_size = 10) {
scaled_size <- ((x - min(x, na.rm = TRUE)) / (max(x, na.rm = TRUE) - min(x, na.rm = TRUE))) * (max_size - min_size) + min_size
return(scaled_size)
}
# Crie o mapa com a visualização inicial em Portugal
leaflet(dados.sf) %>%
addTiles(urlTemplate = "https://{s}.basemaps.cartocdn.com/rastertiles/voyager/{z}/{x}/{y}.png") %>%
addCircleMarkers(data = dados.sf,
radius = ~scale_size(ANE.EA)*1.5, # Use a coluna escalada para o tamanho dos pontos
color = ~pal(presenca),
fillColor = ~pal(presenca),
fillOpacity = 0.8, # Define a opacidade dos pontos
stroke = FALSE, # Remove os limites dos pontos
label = ~sprintf(
"Year: %s<br>Month: %s<br>SST: %.2f °C<br>CHL: %.2f mg/m³<br>Abundance: %.2f m²/mn²",
year, Month, SST, CHL, ANE.EA
) %>% lapply(htmltools::HTML)) %>%
addLegend("bottomright",
pal = pal,
values = dados.sf$presenca,
title = "Presença",
labels = c("Ausência", "Presença"),
opacity = 1) %>%
setView(lng = -8, lat = 39.5, zoom = 7) # Coordenadas para centralizar em Portugal
# Crie o mapa com a visualização inicial em Portugal
leaflet(dados.sf) %>%
addTiles(urlTemplate = "https://{s}.basemaps.cartocdn.com/rastertiles/voyager/{z}/{x}/{y}.png") %>%
addCircleMarkers(data = dados.sf[dados.sf$year=="2022",],
radius = ~scale_size(ANE.EA)*1.5, # Use a coluna escalada para o tamanho dos pontos
color = ~pal(presenca),
fillColor = ~pal(presenca),
fillOpacity = 0.8, # Define a opacidade dos pontos
stroke = FALSE, # Remove os limites dos pontos
label = ~sprintf(
"SST: %.2f °C<br>CHL: %.2f mg/m³<br>Abundance: %.2f m²/mn²",
SST, CHL, ANE.EA
) %>% lapply(htmltools::HTML)) %>%
addLegend("bottomright",
pal = pal,
values = dados.sf$presenca,
title = "Presença",
labels = c("Ausência", "Presença"),
opacity = 1) %>%
setView(lng = -8, lat = 39.5, zoom = 7) # Coordenadas para centralizar em Portugal